# -*- coding: utf-8 -*-
import scrapy
from duanwenxue.items import Posts


class DwxSpider(scrapy.Spider):
    name = 'dwx'
    allowed_domains = ['www.duanwenxue.com']
    start_urls = ['http://www.duanwenxue.com/']

    def parse(self, response):
        for sel in response.xpath('//div[@class="subnav"]/ul/li'):
            link = sel.xpath('a/@href').extract_first()
            url = response.urljoin(link)
            yield scrapy.Request(url=url, callback=self.get_classify)

    def get_classify(self, response):
        for sel in response.xpath('//div[@class="list-base-article"]/ul/li'):
            title = sel.xpath('a/text()').extract_first()
            link = sel.xpath('a/@href').extract_first()
            url = response.urljoin(link)
            yield scrapy.Request(url = url, callback=self.get_article)
        next_page = response.xpath('//div[@class="list-pages"]/ul/li[last()-2]/a/@href').extract_first()
        print(next_page)
        url = response.urljoin(next_page)
        yield scrapy.Request(url=url, callback=self.get_classify)
    
    def get_article(self, response):
        item = Posts()
        item['title'] = response.xpath('//div[@class="row-article"]/h1/text()').extract_first()
        item['author'] = response.xpath('//div[@class="face"]/a/span/text()').extract_first()
        item['avatar'] = response.xpath('//div[@class="face"]/a/img/@src').extract_first()
        item['content'] = response.xpath('//div[@class="article-content"]/p').extract()
        yield item
